This document explores the mango data from SIAP - Mexico. - Data has been summarized at the state level. - 26 states report mango production during the time period of 1980 - 2016. - There are 962 observations total (26 states x 37 years), one observation/year for each state.
Panel data
A data frama with all years and all states that grow mango.
period <- tibble(rep(c(1980:2016), times = 26)) #26 states report mango production
colnames(period) <- c("year")
states <- tibble(rep(c("baja california", "baja california sur", "campeche",
"chiapas", "colima", "durango",
"guanajuato", "guerrero", "hidalgo", "jalisco",
"mexico", "michoacan", "morelos", "nayarit", "oaxaca",
"puebla", "queretaro", "quintana roo",
"san luis potosi", "sinaloa", "sonora", "tabasco",
"tamaulipas", "veracruz", "yucatan", "zacatecas"), times = 37))
colnames(states) <- c("state")
states <- states %>%
arrange(state)
states_period <- cbind(states, period)
Plots
Production
mango %>%
group_by(state) %>%
summarise(max_prod = max(ag_prod, na.rm=T),
min_prod = min(ag_prod, na.rm=T),
range_prod = max(ag_prod, na.rm=T) - min(ag_prod, na.rm=T),
sd_prod = sd(ag_prod, na.rm=T),
mean_prod = mean(ag_prod, na.rm=T),
median_prod = median(ag_prod, na.rm=T)) %>%
knitr::kable()
## `summarise()` ungrouping output (override with `.groups` argument)
| baja california |
0.00 |
0.00 |
0.00 |
NA |
0.0000 |
0.000 |
| baja california sur |
9913.65 |
578.00 |
9335.65 |
2230.2162 |
3563.9111 |
2789.800 |
| campeche |
42933.70 |
3930.00 |
39003.70 |
11796.7935 |
23410.3406 |
26213.720 |
| chiapas |
238429.55 |
37760.00 |
200669.55 |
63980.3042 |
123791.2263 |
131164.700 |
| colima |
91294.00 |
9623.00 |
81671.00 |
19869.9166 |
48479.2611 |
49431.560 |
| durango |
1576.42 |
65.00 |
1511.42 |
492.2448 |
818.9611 |
720.000 |
| guanajuato |
405.00 |
0.00 |
405.00 |
143.2752 |
205.7000 |
211.000 |
| guerrero |
372282.78 |
36575.00 |
335707.78 |
93659.3813 |
215997.6649 |
189171.000 |
| hidalgo |
2568.00 |
0.00 |
2568.00 |
534.0370 |
701.3468 |
588.965 |
| jalisco |
113607.55 |
27022.00 |
86585.55 |
14033.8712 |
51651.2451 |
48642.000 |
| mexico |
9620.00 |
1545.00 |
8075.00 |
1583.3664 |
4304.7063 |
4499.750 |
| michoacan |
144675.07 |
19434.00 |
125241.07 |
36260.2297 |
95737.3186 |
109750.000 |
| morelos |
10726.00 |
5079.99 |
5646.01 |
1327.1275 |
7905.7977 |
8055.000 |
| nayarit |
364814.00 |
52362.00 |
312452.00 |
81069.9913 |
186393.2317 |
181319.350 |
| oaxaca |
275120.00 |
73573.00 |
201547.00 |
33085.2491 |
174068.0546 |
177690.000 |
| puebla |
2507.00 |
122.70 |
2384.30 |
603.4554 |
1059.7005 |
920.000 |
| queretaro |
1414.00 |
102.00 |
1312.00 |
297.5532 |
522.3822 |
460.000 |
| quintana roo |
230.00 |
20.00 |
210.00 |
63.8367 |
114.8667 |
102.000 |
| san luis potosi |
19429.00 |
775.00 |
18654.00 |
3118.7475 |
4240.9571 |
4008.000 |
| sinaloa |
339530.40 |
11897.00 |
327633.40 |
79157.4889 |
158816.6494 |
158796.000 |
| sonora |
3916.00 |
0.00 |
3916.00 |
676.7596 |
539.5373 |
349.500 |
| tabasco |
9348.00 |
980.30 |
8367.70 |
2557.4636 |
3826.4303 |
3152.500 |
| tamaulipas |
12984.98 |
0.00 |
12984.98 |
3391.8427 |
6345.0753 |
6737.000 |
| veracruz |
311128.00 |
87844.80 |
223283.20 |
65246.1948 |
189157.0960 |
182775.000 |
| yucatan |
16149.00 |
1113.40 |
15035.60 |
2807.8291 |
5158.2930 |
5086.000 |
| zacatecas |
1419.00 |
21.00 |
1398.00 |
298.1457 |
334.0278 |
169.500 |
mango %>%
ggplot(aes(state, ag_prod)) +
geom_boxplot() +
ylab("Production (tonnes)") +
xlab("State") +
#scale_y_continuous(labels = comma) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust= 0.5))
## Warning: Removed 127 rows containing non-finite values (stat_boxplot).

number_obs <- mango %>%
group_by(state) %>%
summarise(obs = sum(!is.na(ag_prod)))
## `summarise()` ungrouping output (override with `.groups` argument)
mango_complete <- number_obs %>%
filter(obs > 34)
mango_complete
## # A tibble: 20 x 2
## state obs
## <chr> <int>
## 1 baja california sur 35
## 2 campeche 35
## 3 chiapas 35
## 4 colima 35
## 5 durango 35
## 6 guerrero 35
## 7 jalisco 35
## 8 mexico 35
## 9 michoacan 35
## 10 morelos 35
## 11 nayarit 35
## 12 oaxaca 35
## 13 puebla 37
## 14 queretaro 37
## 15 san luis potosi 35
## 16 sinaloa 35
## 17 sonora 37
## 18 veracruz 35
## 19 yucatan 37
## 20 zacatecas 36
mango_ts <- mango %>%
ggplot(aes(year, ag_prod)) +
geom_line()+
ylab("Production (tonnes)") +
xlab("Years") +
ggtitle("Mango Production 1980 - 2016") +
theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
geom_rect(data = subset(mango, state %in% c(mango_complete$state)),
fill = NA, colour = "red", xmin = -Inf,xmax = Inf,
ymin = -Inf,ymax = Inf) +
facet_wrap(~state, scales="free_y", ncol=5)
#facet_wrap(~state, ncol=5)
mango_ts
## Warning: Removed 5 rows containing missing values (geom_path).

Yield
mango %>%
group_by(state) %>%
summarise(max_yield = max(ag_yield, na.rm=T),
min_yield = min(ag_yield, na.rm=T),
range_yield = max(ag_yield, na.rm=T) - min(ag_yield, na.rm=T),
sd_yield = sd(ag_yield, na.rm=T),
mean_yield = mean(ag_yield, na.rm=T),
median_yield = median(ag_yield, na.rm=T)) %>%
knitr::kable()
## Warning in max(ag_yield, na.rm = T): no non-missing arguments to max;
## returning -Inf
## Warning in min(ag_yield, na.rm = T): no non-missing arguments to min;
## returning Inf
## Warning in max(ag_yield, na.rm = T): no non-missing arguments to max;
## returning -Inf
## Warning in min(ag_yield, na.rm = T): no non-missing arguments to min;
## returning Inf
## `summarise()` ungrouping output (override with `.groups` argument)
| baja california |
-Inf |
Inf |
-Inf |
NA |
NaN |
NA |
| baja california sur |
13.11 |
2.98 |
10.13 |
2.4665390 |
7.940286 |
7.980 |
| campeche |
16.23 |
3.54 |
12.69 |
3.1033450 |
11.527143 |
12.060 |
| chiapas |
16.45 |
6.36 |
10.09 |
2.7139152 |
9.633714 |
8.430 |
| colima |
18.93 |
6.50 |
12.43 |
3.4242340 |
12.265429 |
12.870 |
| durango |
6.50 |
2.14 |
4.36 |
1.1974801 |
4.440571 |
4.320 |
| guanajuato |
8.58 |
4.04 |
4.54 |
1.3982515 |
7.222500 |
7.480 |
| guerrero |
21.80 |
10.14 |
11.66 |
2.4896751 |
12.723429 |
11.640 |
| hidalgo |
13.52 |
4.00 |
9.52 |
1.9300048 |
7.066061 |
7.170 |
| jalisco |
14.59 |
7.03 |
7.56 |
1.5088998 |
9.834571 |
9.500 |
| mexico |
15.93 |
5.49 |
10.44 |
1.9393346 |
8.536000 |
8.390 |
| michoacan |
9.47 |
5.09 |
4.38 |
0.9597775 |
6.655143 |
6.380 |
| morelos |
20.04 |
9.57 |
10.47 |
2.4818776 |
13.991143 |
14.230 |
| nayarit |
53.97 |
6.70 |
47.27 |
7.5669918 |
12.153714 |
10.990 |
| oaxaca |
37.48 |
7.34 |
30.14 |
5.5883251 |
12.836000 |
11.400 |
| puebla |
12.23 |
5.33 |
6.90 |
1.5156813 |
8.591351 |
8.230 |
| queretaro |
19.64 |
2.45 |
17.19 |
2.6846601 |
7.150540 |
6.700 |
| quintana roo |
10.00 |
2.23 |
7.77 |
2.8161105 |
5.307333 |
4.590 |
| san luis potosi |
12.72 |
3.47 |
9.25 |
1.9822870 |
8.588286 |
8.250 |
| sinaloa |
14.99 |
4.02 |
10.97 |
2.7130059 |
10.136000 |
10.120 |
| sonora |
22.00 |
1.00 |
21.00 |
5.6936563 |
13.683143 |
14.390 |
| tabasco |
20.68 |
4.90 |
15.78 |
3.1584875 |
7.416250 |
6.055 |
| tamaulipas |
15.24 |
2.21 |
13.03 |
2.7185624 |
6.505758 |
5.700 |
| veracruz |
10.13 |
3.54 |
6.59 |
1.5197955 |
7.102571 |
7.360 |
| yucatan |
24.39 |
9.20 |
15.19 |
2.7960165 |
13.474054 |
13.300 |
| zacatecas |
15.42 |
2.53 |
12.89 |
3.5636149 |
7.518333 |
6.625 |
mango %>%
ggplot(aes(state, ag_yield)) +
geom_boxplot() +
ylab("Yield (tonnes/ha)") +
xlab("State") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust= 0.5))
## Warning: Removed 134 rows containing non-finite values (stat_boxplot).

number_obs <- mango %>%
group_by(state) %>%
summarise(obs = sum(!is.na(ag_yield)))
## `summarise()` ungrouping output (override with `.groups` argument)
mango_complete <- number_obs %>%
filter(obs > 34)
mango_complete
## # A tibble: 20 x 2
## state obs
## <chr> <int>
## 1 baja california sur 35
## 2 campeche 35
## 3 chiapas 35
## 4 colima 35
## 5 durango 35
## 6 guerrero 35
## 7 jalisco 35
## 8 mexico 35
## 9 michoacan 35
## 10 morelos 35
## 11 nayarit 35
## 12 oaxaca 35
## 13 puebla 37
## 14 queretaro 37
## 15 san luis potosi 35
## 16 sinaloa 35
## 17 sonora 35
## 18 veracruz 35
## 19 yucatan 37
## 20 zacatecas 36
mango_ts <- mango %>%
ggplot(aes(year, ag_yield)) +
geom_line()+
ylab("Yield (tonnes/ha)") +
xlab("Years") +
ggtitle("Mango Yields 1980 - 2016") +
theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
geom_rect(data = subset(mango, state %in% c(mango_complete$state)),
fill = NA, colour = "red", xmin = -Inf,xmax = Inf,
ymin = -Inf,ymax = Inf) +
facet_wrap(~state, scales="free_y", ncol=5)
#facet_wrap(~state, ncol=5)
mango_ts
## Warning: Removed 37 rows containing missing values (geom_path).

Area
mango %>%
group_by(state) %>%
summarise(max_area = max(ag_harv, na.rm=T),
min_area = min(ag_harv, na.rm=T),
range_area = max(ag_harv, na.rm=T) - min(ag_harv, na.rm=T),
sd_area = sd(ag_harv, na.rm=T),
mean_area = mean(ag_harv, na.rm=T),
median_area = median(ag_harv, na.rm=T)) %>%
knitr::kable()
## `summarise()` ungrouping output (override with `.groups` argument)
| baja california |
0.00 |
0.00 |
0.00 |
NA |
0.00000 |
0.000 |
| baja california sur |
1058.50 |
194.00 |
864.50 |
280.88033 |
482.87432 |
343.000 |
| campeche |
2746.00 |
1008.00 |
1738.00 |
656.95773 |
1917.71714 |
2172.500 |
| chiapas |
32840.57 |
4720.00 |
28120.57 |
8545.53501 |
14094.29543 |
15534.000 |
| colima |
5357.00 |
1476.00 |
3881.00 |
931.14604 |
3809.70556 |
3837.625 |
| durango |
418.00 |
11.00 |
407.00 |
134.83013 |
197.88571 |
120.000 |
| guanajuato |
50.00 |
0.00 |
50.00 |
18.14632 |
28.20000 |
33.000 |
| guerrero |
24738.40 |
3429.00 |
21309.40 |
6195.62129 |
16828.66486 |
16993.000 |
| hidalgo |
285.00 |
0.00 |
285.00 |
50.84878 |
96.08824 |
102.000 |
| jalisco |
7786.84 |
3444.00 |
4342.84 |
867.18085 |
5225.99086 |
5135.000 |
| mexico |
668.00 |
222.00 |
446.00 |
118.41836 |
490.45270 |
523.500 |
| michoacan |
22520.24 |
2053.00 |
20467.24 |
6375.98865 |
14987.50086 |
18080.000 |
| morelos |
955.00 |
350.93 |
604.07 |
174.28874 |
577.96297 |
565.000 |
| nayarit |
25032.41 |
6307.00 |
18725.41 |
5785.05831 |
16079.22571 |
17795.000 |
| oaxaca |
18193.00 |
5663.00 |
12530.00 |
3382.99136 |
14564.89371 |
14800.000 |
| puebla |
227.00 |
23.00 |
204.00 |
52.74027 |
119.24324 |
122.000 |
| queretaro |
154.00 |
12.00 |
142.00 |
30.09507 |
73.18919 |
68.000 |
| quintana roo |
52.00 |
5.00 |
47.00 |
15.03266 |
24.53333 |
22.000 |
| san luis potosi |
1528.00 |
125.00 |
1403.00 |
260.78664 |
467.67568 |
412.000 |
| sinaloa |
31603.79 |
1158.00 |
30445.79 |
8395.77506 |
16643.79343 |
15073.000 |
| sonora |
289.00 |
0.00 |
289.00 |
48.82309 |
39.97297 |
29.000 |
| tabasco |
1450.00 |
163.00 |
1287.00 |
306.82520 |
512.90625 |
432.500 |
| tamaulipas |
2024.00 |
0.00 |
2024.00 |
390.58291 |
963.97278 |
1076.000 |
| veracruz |
38765.00 |
17771.21 |
20993.79 |
6352.42564 |
26433.37200 |
26769.500 |
| yucatan |
1052.00 |
96.00 |
956.00 |
217.58591 |
392.02054 |
376.200 |
| zacatecas |
92.00 |
4.00 |
88.00 |
16.94997 |
39.11111 |
38.000 |
mango %>%
ggplot(aes(state, ag_harv)) +
geom_boxplot() +
ylab("Area (tonnes)") +
xlab("State") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust= 0.5))
## Warning: Removed 116 rows containing non-finite values (stat_boxplot).

number_obs <- mango %>%
group_by(state) %>%
summarise(obs = sum(!is.na(ag_harv)))
## `summarise()` ungrouping output (override with `.groups` argument)
mango_complete <- number_obs %>%
filter(obs > 34)
mango_complete
## # A tibble: 21 x 2
## state obs
## <chr> <int>
## 1 baja california sur 37
## 2 campeche 35
## 3 chiapas 35
## 4 colima 36
## 5 durango 35
## 6 guerrero 35
## 7 jalisco 35
## 8 mexico 37
## 9 michoacan 35
## 10 morelos 37
## # … with 11 more rows
mango_ts <- mango %>%
ggplot(aes(year, ag_harv)) +
geom_line()+
ylab("Area harvested (ha)") +
xlab("Years") +
ggtitle("Mango - Area Harvested (ha) 1980 - 2016") +
theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
geom_rect(data = subset(mango, state %in% c(mango_complete$state)),
fill = NA, colour = "red", xmin = -Inf,xmax = Inf,
ymin = -Inf,ymax = Inf) +
facet_wrap(~state, scales="free_y", ncol=5)
#facet_wrap(~state, ncol=5)
mango_ts
## Warning: Removed 5 rows containing missing values (geom_path).

Losses
mango %>%
group_by(state) %>%
summarise(max_losses = max(ag_losses, na.rm=T),
min_losses = min(ag_losses, na.rm=T),
range_losses = max(ag_losses, na.rm=T) - min(ag_losses, na.rm=T),
sd_losses = sd(ag_losses, na.rm=T),
mean_losses = mean(ag_losses, na.rm=T),
median_losses = median(ag_losses, na.rm=T)) %>%
knitr::kable()
## `summarise()` ungrouping output (override with `.groups` argument)
| baja california |
8.0 |
8 |
0.0 |
NA |
8.000000 |
8.0 |
| baja california sur |
554.5 |
0 |
554.5 |
182.956588 |
155.925714 |
34.0 |
| campeche |
1082.0 |
0 |
1082.0 |
297.118707 |
300.334286 |
191.0 |
| chiapas |
7770.0 |
0 |
7770.0 |
2400.817144 |
2008.847059 |
1362.0 |
| colima |
1837.0 |
0 |
1837.0 |
388.772047 |
352.538857 |
236.0 |
| durango |
64.0 |
0 |
64.0 |
14.096730 |
6.969697 |
0.0 |
| guanajuato |
30.0 |
0 |
30.0 |
11.972190 |
9.000000 |
5.0 |
| guerrero |
4326.0 |
0 |
4326.0 |
1159.913350 |
1046.157143 |
935.0 |
| hidalgo |
105.0 |
0 |
105.0 |
31.117659 |
13.393939 |
0.0 |
| jalisco |
1952.0 |
0 |
1952.0 |
447.142025 |
465.228571 |
407.0 |
| mexico |
215.0 |
0 |
215.0 |
59.744350 |
36.228571 |
0.0 |
| michoacan |
8657.0 |
4 |
8653.0 |
1651.087434 |
2833.640286 |
2771.0 |
| morelos |
479.0 |
0 |
479.0 |
109.128580 |
53.780000 |
11.0 |
| nayarit |
4781.0 |
0 |
4781.0 |
1116.356040 |
1201.952000 |
995.0 |
| oaxaca |
9376.0 |
0 |
9376.0 |
1873.788262 |
1315.271429 |
880.0 |
| puebla |
64.0 |
0 |
64.0 |
19.373247 |
10.028571 |
0.0 |
| queretaro |
168.0 |
0 |
168.0 |
44.659157 |
26.828571 |
0.0 |
| quintana roo |
63.0 |
5 |
58.0 |
18.293116 |
31.066667 |
28.0 |
| san luis potosi |
891.0 |
0 |
891.0 |
185.143583 |
99.171429 |
0.0 |
| sinaloa |
5581.0 |
0 |
5581.0 |
1338.126482 |
996.194286 |
554.0 |
| sonora |
405.0 |
0 |
405.0 |
125.713012 |
78.942857 |
20.0 |
| tabasco |
622.0 |
0 |
622.0 |
180.996568 |
110.633333 |
16.0 |
| tamaulipas |
1570.0 |
0 |
1570.0 |
449.469218 |
240.294118 |
0.0 |
| veracruz |
5912.0 |
0 |
5912.0 |
1543.099267 |
1313.721429 |
600.0 |
| yucatan |
451.0 |
0 |
451.0 |
107.830483 |
66.632857 |
39.5 |
| zacatecas |
39.0 |
0 |
39.0 |
7.344405 |
2.382353 |
0.0 |
mango %>%
ggplot(aes(state, ag_losses)) +
geom_boxplot() +
ylab("Losses (ha)") +
xlab("State") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust= 0.5))
## Warning: Removed 143 rows containing non-finite values (stat_boxplot).

number_obs <- mango %>%
group_by(state) %>%
summarise(obs = sum(!is.na(ag_losses)))
## `summarise()` ungrouping output (override with `.groups` argument)
mango_complete <- number_obs %>%
filter(obs > 34)
mango_complete
## # A tibble: 17 x 2
## state obs
## <chr> <int>
## 1 baja california sur 35
## 2 campeche 35
## 3 colima 35
## 4 guerrero 35
## 5 jalisco 35
## 6 mexico 35
## 7 michoacan 35
## 8 morelos 35
## 9 nayarit 35
## 10 oaxaca 35
## 11 puebla 35
## 12 queretaro 35
## 13 san luis potosi 35
## 14 sinaloa 35
## 15 sonora 35
## 16 veracruz 35
## 17 yucatan 35
mango_ts <- mango %>%
ggplot(aes(year, ag_losses)) +
geom_line()+
ylab("Losses (ha)") +
xlab("Years") +
ggtitle("Mango - Losses (planted-harvested) (ha) 1980 - 2016") +
theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
geom_rect(data = subset(mango, state %in% c(mango_complete$state)),
fill = NA, colour = "red", xmin = -Inf,xmax = Inf,
ymin = -Inf,ymax = Inf) +
facet_wrap(~state, scales="free_y", ncol=5)
#facet_wrap(~state, ncol=5)
mango_ts
## Warning: Removed 5 rows containing missing values (geom_path).
